import jieba
from wordcloud import WordCloud
import numpy as np

import PIL.Image as Image

excludes = {"什么", "一个", "手机", "说道"}
f = open("./asssets/hlm.txt", "r", encoding='utf-8')
txt = f.read()
f.close()
words = jieba.lcut(txt)
counts = {}
for word in words:
    if len(word) == 1:  # 排除单个字符的分词结果
        continue
    else:
        counts[word] = counts.get(word, 0) + 1
for word in excludes:
    del (counts[word])
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
for i in range(100):
    word, count = items[i]
    print("{0:{2}<5}出场次数:{1:{2}<5}".format(word, count, (chr(12288))))

##############
background = Image.open("./asssets/img.png").convert('RGB')

mask = np.array(background)
newtxt = "".join(words)
wordcloud = WordCloud(background_color="white", \
                      width=800,
                      height=600,
                      font_path="./asssets/msyh.ttf",
                      max_words=200,
                      mask=mask,
                      max_font_size=80,
                      stopwords=excludes,
                      ).generate(newtxt)
wordcloud.to_file("红楼梦词云.png")

from PIL import Image

img = Image.open('红楼梦词云.png')
img.show()
